clear all
cap log close 
set memory 700m
set matsize 5500
pause off

*Program written by Dominick Bartelme 6/6/2012
*New data on institutions, technology diffusion, and fraction of population of European descent 

*Local variable for user.  If you want to run on your computer, you have to change the local user AND provide
*the alternative directories whenever there is a directory change.

local user=1

if `user'==1 {
cd "C:\Documents and Settings\Latitude\Desktop\Yuriy Data 6-6-2012"
}
else {

}

*Importing country codes and country names
use country_code_4_Dominick.dta, clear
keep if priority==1
sort countrycode
save final_dataset, replace


*******************************************
*Fraction of Population of European Descent
*******************************************

*AJR (2001): fraction of population of European descent in 1900

use maketable1, clear
rename shortnam countrycode

*Making countrycodes compatible
replace countrycode ="GER" if countrycode == "DEU"

sort countrycode
keep countrycode euro1900
merge countrycode using final_dataset
drop _merge

*Replacing European descent variable for countries that we know
replace euro1900 =100 if countrycode == "SCG" | countrycode =="ALB" | countrycode == "IRL" | countrycode == "SVN"
replace euro1900 =0 if countrycode == "KHM"
drop if country=="" 

sort countrycode
save final_dataset, replace


*Fearon (2003): different ethnic groups in each country
*need to aggregate sometimes different definitions of ethnicities into a single number for european descent

use egroupsrepdata, clear
rename country c
keep ccode c group gpro
sort ccode
tempfile temp
save `temp', replace

*Converting ccode into matching country codes
insheet using cow_country.csv, clear
duplicates drop
drop if countrycode == "NTH" | countrycode =="PAR"
sort ccode

merge ccode using `temp'
drop if countrycode=="GER"
replace countrycode="GER" if countrycode=="GFR"
drop if countrycode==""
drop _merge ccode
sort countrycode

merge countrycode using final_dataset
drop if _merge==1
drop _merge c
sort countrycode group
save `temp', replace

*Merging with a coding of European and non-European ethniciies, as well as mixed European ancestry
insheet using ethnicity.csv, comma clear
sort countrycode group
merge countrycode group using `temp'
drop _merge

replace mixed=1 if countrycode == "PRY" & group=="MESTIZO"
replace euro_eth=1 if countrycode == "NLD" & group=="DUTCH"
replace mixed=0 if mixed==. & gpro !=.
replace euro_eth=0 if euro_eth==. & gpro !=.
drop if gpro==.

*Calculating percent of population of European ancestry, not including mixed ancestry
egen e1 = sum(gpro), by(countrycode euro_eth mixed)
duplicates drop countrycode euro_eth mixed, force
gen percent_euro = e1 if euro_eth==1
replace percent_euro=0 if euro_eth==0

*Calculating percent of population with European ancestry, including mixed ancestry as 1/2 European
gen percent_euro_m = .5*e1 if mixed==1
replace percent_euro_m =e1 if euro_eth==1
replace percent_euro_m =0 if percent_euro_m ==.

collapse(sum) percent_euro percent_euro_m, by(countrycode country euro1900 priority_flag)
label var percent_euro "Percent European Descent, not including mixed"
label var percent_euro_m "Percent European Descent, counting mixed as 1/2 European"

keep countrycode percent_euro percent_euro_m
sort countrycode
merge countrycode using final_dataset
drop _merge
sort country
save final_dataset, replace



*********************
*Technology Diffusion
*********************

*Royalty Imports and Exports from the UN

insheet using royalty_import_export.csv, comma clear
keep period reporter tradeflow tradevalue
drop if period>2005

egen av_imports = mean(tradevalue) if tradeflow=="Import" , by(reporter)
egen av_exports = mean(tradevalue) if tradeflow=="Export" , by(reporter)
duplicates drop reporter av_imports av_exports, force
collapse(sum) av_imports av_exports, by (reporter)
rename reporter country
label var av_imports "Average Royalty Payments to Other Countries, 2000-2005"
label var av_exports "Average Royalty Payments Received from Other Countries, 2000-2005"

*Have to manually align country names correctly
replace country ="Congo, Dem. Rep."  if country=="Dem. Rep. of the Congo"
replace country ="Czech Republic"  if country=="Czech Rep"
replace country ="Dominican Republic" if country=="Dominican Rep."
replace country ="Macedonia" if country=="TFYR Macedonia"
replace country ="Moldova" if country=="Rep. of Moldova"
replace country ="Russia" if country=="Russian Federation"
replace country ="Slovak Republic" if country=="Slovakia"
replace country ="Tanzania" if country=="United Rep. of Tanzania"
replace country ="United States" if country=="USA"
sort country

merge country using final_dataset
drop if _merge == 1
drop _merge 
sort country
save final_dataset, replace

*Volume of Trade with OECD countries, and Machinery imports and exports
insheet using oecd_total.csv, comma clear
egen oecd_imp = mean(value) if flow=="Imports", by(partnercountry)
egen oecd_exp = mean(value) if flow=="Exports", by(partnercountry)
duplicates drop partnercountry oecd_imp oecd_exp, force
collapse(sum) oecd_imp oecd_exp, by(partnercountry)
label var oecd_imp "Total Value of Imports from OECD countries, 1995-2000"
label var oecd_exp "Total Value of Exports from OECD countries, 1995-2000"
rename partnercountry country
sort country
save `temp', replace

insheet using oecd_machinery.csv, comma clear
egen oecd_mach_imp = mean(value) if flow=="Imports", by(partnercountry)
egen oecd_mach_exp = mean(value) if flow=="Exports", by(partnercountry)
duplicates drop partnercountry oecd_mach_imp oecd_mach_exp, force
collapse(sum) oecd_mach_imp oecd_mach_exp, by(partnercountry)
label var oecd_mach_imp "Value of Machinery and Equipment Imports from OECD countries, 1995-2000"
label var oecd_mach_exp "Value of Machinery and Equipment Exports from OECD countries, 1995-2000"
rename partnercountry country
sort country
merge country using `temp'
drop _merge


*Have to manually align country names correctly
replace country ="Congo, Dem. Rep."  if country=="Congo, The Democratic Republic of the "
replace country ="Congo, Republic of"  if country=="Congo"
replace country ="Gambia, The" if country=="Gambia"
replace country ="Moldova" if country=="Moldova, Republic of "
replace country ="Iran" if country=="Iran, Islamic Republic of"
replace country ="Korea, Dem. Rep." if country=="Korea, Democratic People's Republic of"
replace country ="Korea, Republic of" if country=="Korea"
replace country ="Laos" if country=="Lao People's Democratic Republic"
replace country ="Macedonia" if country=="Macedonia, The Former Yugoslav Republic of "
replace country ="Syria" if country=="Syrian Arab Republic"
replace country ="Taiwan" if country=="Chinese Taipei"
replace country ="Trinidad &Tobago" if country=="Trinidad and Tobago"
replace country ="Russia" if country=="Russian Federation"
replace country ="Tanzania" if country=="Tanzania, United Republic of"
replace country ="Vietnam" if country=="Viet Nam"

sort country
merge country using final_dataset
drop if _merge==1
drop _merge
sort countrycode
save final_dataset, replace


*Migration Data

*preliminary import of PWT GDP data and making the weights
use cross-country-data, clear
egen world_gdp = sum(pwt_rgdpch)
gen weight = pwt_rgdpch/world_gdp
drop if weight==.
keep countrycode country weight
sort countrycode
tempfile temp
save `temp', replace

*Importing Emigration by origin country
insheet using migration.csv, comma clear
rename statecode countrycode
replace countrycode="ZAR" if countrycode =="COD"
replace countrycode = "GER" if countrycode =="DEU"
rename deu ger
rename cod zar
sort countrycode 
merge countrycode using `temp'
sort _merge countrycode

*need to drop variables associated with countries for which we don't have gdp data
drop abw	aia	and	ant	asm	atg	bhs	blz	bmu	brb	brn	cok	com	cpv	cym	dma	flk	fro	fsm	gib	glp	gnq	grd	grl	guf	gum	hkg	kir	kna	lca	lie	mac	mco	mdv	mhl	mlt	mmr	mnp	msr	mtq	myt	ncl	nfk	niu	nru	plw	png	pri	pse	pyf	qat	reu	shn	slb	smr	spm	stp	sur	syc	tca	tkl	tls	ton	tuv	vct	vgb	vir	vut	wlf	wsm
drop if _merge==1
drop _merge origincountries
aorder
order country weight countrycode

*Making matrices
mkmat weight
mkmat afg	ago	alb	are	arg	arm	aus	aut	aze	bdi	bel	ben	bfa	bgd	bgr	bhr	bih	blr	bol	bra	btn	bwa	caf	can	che	chl	chn	civ	cmr	cog	col	cri	cub	cyp	cze	dji	dnk	dom	dza	ecu	egy	eri	esp	est	eth	fin	fji	fra	gab	gbr	geo	ger	gha	gin	gmb	gnb	grc	gtm	guy	hnd	hrv	hti	hun	idn	ind	irl	irn	irq	isl	isr	ita	jam	jor	jpn	kaz	ken	kgz	khm	kor	kwt	lao	lbn	lbr	lby	lka	lso	ltu	lux	lva	mar	mda	mdg	mex	mkd	mli	mng	moz	mrt	mus	mwi	mys	nam	ner	nga	nic	nld	nor	npl	nzl	omn	pak	pan	per	phl	pol	prk	prt	pry	rom	rus	rwa	sau	scg	sdn	sen	sgp	sle	slv	som	svk	svn	swe	swz	syr	tcd	tgo	tha	tjk	tkm	tto	tun	tur	twn	tza	uga	ukr	ury	usa	uzb	ven	vnm	yem	zaf	zar	zmb	zwe, matrix(X)

matrix i = (weight'*X)'
matrix e = X*weight
svmat i
svmat e
rename i1 immigrant
rename e1 emigrant

label var immigrant "Stock of immigrants, weighted by GDP/capita of country of origin"
label var emigrant "Stock of emigrants, weighted by GDP/capita of destinatio country"
keep countrycode immigrant emigrant
sort countrycode
merge countrycode using final_dataset
drop _merge
save final_dataset, replace